To run this code in my project using the renv environment, run the following lines of code
install.packages("renv") #install the package on the new computer (may not be necessary if renv bootstraps itself as expected)
renv::restore() #reinstall all the package versions in the renv lockfile
require("genefilter")
require("DESeq2")
require("apeglm")
require("ashr")
require("ggplot2")
require("vsn")
require("hexbin")
require("pheatmap")
require("RColorBrewer")
require("EnhancedVolcano")
require("rtracklayer")
require("tidyverse")
sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices datasets utils methods
## [8] base
##
## other attached packages:
## [1] lubridate_1.9.3 forcats_1.0.0
## [3] stringr_1.5.1 dplyr_1.1.4
## [5] purrr_1.0.2 readr_2.1.5
## [7] tidyr_1.3.1 tibble_3.2.1
## [9] tidyverse_2.0.0 rtracklayer_1.62.0
## [11] EnhancedVolcano_1.18.0 ggrepel_0.9.6
## [13] RColorBrewer_1.1-3 pheatmap_1.0.12
## [15] hexbin_1.28.5 vsn_3.68.0
## [17] ggplot2_3.5.1 ashr_2.2-63
## [19] apeglm_1.22.1 DESeq2_1.40.2
## [21] SummarizedExperiment_1.30.2 Biobase_2.60.0
## [23] MatrixGenerics_1.12.3 matrixStats_1.4.1
## [25] GenomicRanges_1.54.1 GenomeInfoDb_1.36.4
## [27] IRanges_2.34.1 S4Vectors_0.38.2
## [29] BiocGenerics_0.46.0 genefilter_1.82.1
##
## loaded via a namespace (and not attached):
## [1] DBI_1.2.3 bitops_1.0-9 rlang_1.1.4
## [4] magrittr_2.0.3 compiler_4.3.2 RSQLite_2.3.9
## [7] png_0.1-8 vctrs_0.6.5 pkgconfig_2.0.3
## [10] crayon_1.5.3 fastmap_1.2.0 XVector_0.40.0
## [13] utf8_1.2.4 Rsamtools_2.18.0 rmarkdown_2.28
## [16] tzdb_0.4.0 preprocessCore_1.62.1 bit_4.5.0
## [19] xfun_0.48 zlibbioc_1.46.0 cachem_1.1.0
## [22] jsonlite_1.8.9 blob_1.2.4 DelayedArray_0.26.7
## [25] BiocParallel_1.34.2 irlba_2.3.5.1 parallel_4.3.2
## [28] R6_2.5.1 stringi_1.8.4 bslib_0.8.0
## [31] limma_3.56.2 SQUAREM_2021.1 jquerylib_0.1.4
## [34] numDeriv_2016.8-1.1 Rcpp_1.0.13-1 knitr_1.48
## [37] timechange_0.3.0 Matrix_1.6-5 splines_4.3.2
## [40] tidyselect_1.2.1 rstudioapi_0.17.0 abind_1.4-8
## [43] yaml_2.3.10 codetools_0.2-20 affy_1.78.2
## [46] lattice_0.22-6 plyr_1.8.9 withr_3.0.1
## [49] KEGGREST_1.40.1 coda_0.19-4.1 evaluate_1.0.1
## [52] survival_3.7-0 Biostrings_2.70.3 pillar_1.9.0
## [55] affyio_1.70.0 BiocManager_1.30.25 renv_1.0.11
## [58] generics_0.1.3 invgamma_1.1 RCurl_1.98-1.16
## [61] truncnorm_1.0-9 emdbook_1.3.13 hms_1.1.3
## [64] munsell_0.5.1 scales_1.3.0 xtable_1.8-4
## [67] glue_1.8.0 tools_4.3.2 BiocIO_1.12.0
## [70] GenomicAlignments_1.38.2 annotate_1.78.0 locfit_1.5-9.10
## [73] mvtnorm_1.3-2 XML_3.99-0.17 grid_4.3.2
## [76] bbmle_1.0.25.1 bdsmatrix_1.3-7 AnnotationDbi_1.64.1
## [79] colorspace_2.1-1 GenomeInfoDbData_1.2.10 restfulr_0.0.15
## [82] cli_3.6.3 fansi_1.0.6 mixsqp_0.3-54
## [85] S4Arrays_1.0.6 gtable_0.3.5 sass_0.4.9
## [88] digest_0.6.37 SparseArray_1.2.4 rjson_0.2.23
## [91] memoise_2.0.1 htmltools_0.5.8.1 lifecycle_1.0.4
## [94] httr_1.4.7 bit64_4.5.2 MASS_7.3-60.0.1
#set standard output directory for figures
outdir <- "../output_RNA/differential_expression"
save_ggplot <- function(plot, filename, width = 10, height = 7, units = "in", dpi = 300,bg=NULL) {
print(plot)
png_path <- file.path(outdir, paste0(filename, ".png"))
pdf_dir <- file.path(outdir, "pdf_figs")
pdf_path <- file.path(pdf_dir, paste0(filename, ".pdf"))
# Ensure the pdf_figs directory exists
if (!dir.exists(pdf_dir)) dir.create(pdf_dir, recursive = TRUE)
# Save plots
ggsave(filename = png_path, plot = plot, width = width, height = height, units = units, dpi = dpi,bg = bg)
ggsave(filename = pdf_path, plot = plot, width = width, height = height, units = units, dpi = dpi,bg = bg)
}
# Specify colors
ann_colors = list(Tissue = c(OralEpi = "palegreen3" ,Aboral = "mediumpurple1"))
Read in raw count data
counts_raw <- read.csv("../output_RNA/stringtie-GeneExt/LCM_RNA_gene_count_matrix.csv", row.names = 1) #load in data
gene_id,LCM_15,LCM_16,LCM_20,LCM_21,LCM_26,LCM_27,LCM_4,LCM_5,LCM_8,LCM_9
Read in metadata
meta <- read.csv("../data_RNA/LCM_RNA_metadata.csv") %>%
dplyr::arrange(Sample) %>%
mutate(across(c(Tissue, Fragment, Section_Date, LCM_Date), factor)) # Set variables as factors
meta$Tissue <- factor(meta$Tissue, levels = c("OralEpi","Aboral")) #we want OralEpi to be the baseline
Read in gff for gene coordinates
gff <- import("../references/Pocillopora_acuta_HIv2.genes.gff3")
gff_transcripts <- as.data.frame(gff) %>% filter(type == "transcript") %>%
select(c(seqnames,start,end,width,strand,ID)) %>%
dplyr::rename("chromosome"=seqnames, "query"=ID)
Data sanity checks!
stopifnot(all(meta$Sample %in% colnames(counts_raw))) #are all of the sample names in the metadata column names in the gene count matrix?
stopifnot(all(meta$Sample == colnames(counts_raw))) #are they the same in the same order?
ffun<-filterfun(pOverA(0.49,10)) # Keep genes expressed at 10+ counts in at least 50% of samples
counts_filt_poa <- genefilter((counts_raw), ffun) #apply filter
filtered_counts <- counts_raw[counts_filt_poa,] #keep only rows that passed filter
paste0("Number of genes after filtering: ", sum(counts_filt_poa))
## [1] "Number of genes after filtering: 14464"
write.csv(filtered_counts, file = file.path(outdir, "filtered_counts.csv"))
There are now 14464 genes in the filtered dataset.
Data sanity checks:
all(meta$Sample %in% colnames(filtered_counts)) #are all of the sample names in the metadata column names in the gene count matrix?
## [1] TRUE
all(meta$Sample == colnames(filtered_counts)) #are they the same in the same order? Should be TRUE
## [1] TRUE
Create DESeq object and run DESeq2
dds <- DESeqDataSetFromMatrix(countData = filtered_counts,
colData = meta,
design= ~ Fragment + Tissue)
dds <- DESeq(dds)
### Extract results for Aboral vs. OralEpi contrast
res <- results(dds, contrast = c("Tissue","Aboral","OralEpi"))
resLFC <- lfcShrink(dds, coef="Tissue_Aboral_vs_OralEpi", res=res, type = "apeglm")
res <- resLFC
resOrdered <- res[order(res$pvalue),]# save differentially expressed genes
DE_05 <- as.data.frame(resOrdered) %>% filter(padj < 0.05 & abs(log2FoldChange) > 1)
DE_05_Up <- DE_05 %>% filter(log2FoldChange > 0) #Higher in Aboral, Lower in OralEpi
DE_05_Down <- DE_05 %>% filter(log2FoldChange < 0) #Lower in Aboral, Higher in OralEpi
nrow(DE_05)
## [1] 1774
nrow(DE_05_Up) #Higher in Aboral, Lower in OralEpi
## [1] 558
nrow(DE_05_Down) #Lower in Aboral, Higher in OralEpi
## [1] 1216
write.csv(as.data.frame(resOrdered),
file = file.path(outdir, "DESeq_results.csv"))
write.csv(DE_05,
file = file.path(outdir, "DEG_05.csv"))
EnhancedVolcano(res,
lab = ifelse(res$padj < 0.05, rownames(res), ""),
x = "log2FoldChange",
y = "pvalue"
)
plotMA(results(dds, contrast = c("Tissue","Aboral","OralEpi")), ylim=c(-20,20))
plotMA(res, ylim=c(-20,20))
resultsNames(dds)
## [1] "Intercept" "Fragment_B_vs_A"
## [3] "Fragment_C_vs_A" "Fragment_D_vs_A"
## [5] "Fragment_E_vs_A" "Tissue_Aboral_vs_OralEpi"
# because we are interested in the comparison and not the intercept, we set 'coef=2'
resNorm <- lfcShrink(dds, coef="Tissue_Aboral_vs_OralEpi", type="normal")
resAsh <- lfcShrink(dds, coef="Tissue_Aboral_vs_OralEpi", type="ashr")
par(mfrow=c(1,3), mar=c(4,4,2,1))
xlim <- c(1,1e5); ylim <- c(-20,20)
plotMA(res, xlim=xlim, ylim=ylim, main="apeglm")
plotMA(resNorm, xlim=xlim, ylim=ylim, main="normal")
plotMA(resAsh, xlim=xlim, ylim=ylim, main="ashr")
plotCounts(dds, gene=which.max(res$log2FoldChange), intgroup="Tissue")
plotCounts(dds, gene=which.min(res$log2FoldChange), intgroup="Tissue")
Transforming count data for visualization
vsd <- vst(dds, blind=FALSE)
rld <- rlog(dds, blind=FALSE)
ntd <- normTransform(dds) # this gives log2(n + 1)
meanSdPlot(assay(vsd), main = "vsd")
meanSdPlot(assay(rld))
meanSdPlot(assay(ntd))
#save the vsd transformation
vsd_mat <- assay(vsd)
write.csv(vsd_mat, file = file.path(outdir, "vsd_expression_matrix.csv"))
Will move forward with vst transformation for visualizations
heatmap_metadata <- as.data.frame(colData(dds)[,c("Tissue","Fragment")])
#view all genes
pheatmap(assay(vsd), cluster_rows=TRUE, show_rownames=FALSE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)),
annotation_colors = ann_colors,color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200))
#view highest count genes
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:20]
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)),
annotation_colors = ann_colors, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200))
#view most significantly differentially expressed genes
select <- order(res$padj)[1:20]
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2, annotation_col=(heatmap_metadata%>% select(Tissue)),
annotation_colors = ann_colors,color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200))
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$Tissue, vsd$Fragment, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows=sampleDists,
clustering_distance_cols=sampleDists,
col=colors)
pcaData <- plotPCA(vsd, intgroup=c("Tissue", "Fragment"), returnData=TRUE, ntop = 14464)
percentVar <- round(100 * attr(pcaData, "percentVar"))
PCA <- ggplot(pcaData, aes(PC1, PC2, color=Tissue, shape=Fragment)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
save_ggplot(PCA, "PCA_allgenes")
PCA_small <- ggplot(pcaData, aes(PC1, PC2, color=Tissue)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
ggsave(filename = paste0(outdir,"/PCA_allgenes_small", ".png"), plot = PCA_small, width = 4, height = 2.5, units = "in", dpi = 300)
pcaData <- plotPCA(vsd, intgroup=c("Tissue", "Fragment"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
PCA <- ggplot(pcaData, aes(PC1, PC2, color=Tissue, shape=Fragment)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
save_ggplot(PCA, "PCA")
PCA_small <- ggplot(pcaData, aes(PC1, PC2, color=Tissue)) +
geom_point(size=2) +
scale_color_manual(values = c("Aboral" = "mediumpurple1", "OralEpi" = "palegreen3"))+
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_bw()
ggsave(filename = paste0(outdir,"/PCA_small", ".png"), plot = PCA_small, width = 4, height = 2.5, units = "in", dpi = 300)
Clearly, the majority of the variance in the data is explained by tissue type!
Download annotation files from genome website
# wget files
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.Conserved_Domain_Search_results.txt.gz
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.EggNog_results.txt.gz
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.KEGG_results.txt.gz
# move to references direcotry
mv *gz ../references
# unzip files
gunzip ../references/*gz
EggNog <- read.delim("../references/Pocillopora_acuta_HIv2.genes.EggNog_results.txt") %>% dplyr::rename("query" = X.query)
CDSearch <- read.delim("../references/Pocillopora_acuta_HIv2.genes.Conserved_Domain_Search_results.txt", quote = "") %>% dplyr::rename("query" = X.Query)
KEGG <- read.delim("../references/Pocillopora_acuta_HIv2.genes.KEGG_results.txt", header = FALSE) %>% dplyr::rename("query" = V1, "KeggTerm" = V2)
DE_05$query <- rownames(DE_05)
DE_05_annot <- DE_05 %>% left_join(CDSearch) %>% select(query,everything())
DE_05_eggnog <- DE_05 %>% left_join(EggNog) %>% select(query,everything())
write.csv(as.data.frame(DE_05_eggnog), file=paste0(outdir,"/DE_05_eggnog_annotation.csv"))
annot_all <- as.data.frame(rownames(dds)) %>% dplyr::rename("query" = `rownames(dds)`) %>% left_join(CDSearch)
eggnog_all <- as.data.frame(rownames(dds)) %>% dplyr::rename("query" = `rownames(dds)`) %>% left_join(EggNog)
gene_labels <- eggnog_all %>% select(query,PFAMs) %>%
mutate_all(~ ifelse(is.na(.), "", .)) %>% #replace NAs with "" for labelling purposes
separate(PFAMs, into = c("PFAMs", "rest of name"), sep = ",(?=.*?,)", extra = "merge")
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[select,"PFAMs"], fontsize_row = 5)
top50_DE
save_ggplot(top50_DE, "top50_DE")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[select,"PFAMs"], fontsize_row = 5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[select,"PFAMs"], fontsize_row = 5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi")
MarkerGenes <- read.csv("../references/Pacuta_MarkerGenes_Levy2021.csv") %>% dplyr::rename("query" = 1, "List" = 2, "definition" = 3) %>% filter(List !="Toolkit")
MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/Pacuta_Spis_Markers_pairs.csv") %>% select(protein_id_spB,cluster,Standardized_Name_spA ) %>% dplyr::rename("query" = 1, "List" = 2)
MarkerGenes$def_short <- ifelse(nchar(MarkerGenes$definition) > 20,
paste0(substr(MarkerGenes$definition, 1, 17), "..."),
MarkerGenes$definition)
Pcomp_MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/combined_Pcomp_markers.csv") %>% select(protein_id_spA,cluster_guess_std,protein_id_spB) %>% dplyr::rename("Pcomp_gene" = protein_id_spA,"query" = protein_id_spB, "List" = cluster_guess_std)
Pcomp_MarkerGenes_broc <- Pcomp_MarkerGenes_broc %>% filter(grepl("Pocillopora",query)&!grepl("unidentified",List)) %>% distinct()
Pcomp_MarkerGenes_broc_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>% select(query,everything()) %>% left_join(Pcomp_MarkerGenes_broc)
Pcomp_MarkerGenes_broc_all_res_Pcomp <- Pcomp_MarkerGenes_broc_all_res %>%
group_by(Pcomp_gene) %>%
slice_min(order_by = padj, with_ties = FALSE) %>%
ungroup()
Pcomp_MarkerGenes_broc_all_res_Pacuta <- Pcomp_MarkerGenes_broc_all_res %>% group_by(query,baseMean,log2FoldChange,lfcSE,pvalue,padj) %>% summarize(List = paste(List, collapse = ";"), .groups="drop")
write.csv(Pcomp_MarkerGenes_broc_all_res_Pcomp, "../output_RNA/differential_expression/Pcomp_snRNA_marker_expr_PacutaLCM.csv")
Pdam_MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/combined_Pdam_markers.csv") %>% select(protein_id_spA,Standardized_Name_spA,protein_id_spB) %>% dplyr::rename("Pdam_gene" = protein_id_spA,"query" = protein_id_spB, "List" = Standardized_Name_spA)
Pdam_MarkerGenes_broc <- Pdam_MarkerGenes_broc %>% filter(grepl("Pocillopora",query)&!grepl("unidentified",List)) %>% distinct()
Pdam_MarkerGenes_broc_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>%
select(query,everything()) %>% left_join(Pdam_MarkerGenes_broc)
Pdam_MarkerGenes_broc_all_res_Pdam <- Pdam_MarkerGenes_broc_all_res %>%
group_by(Pdam_gene) %>%
slice_min(order_by = padj, with_ties = FALSE) %>%
ungroup()
Pdam_MarkerGenes_broc_all_res_Pacuta <- Pdam_MarkerGenes_broc_all_res %>% group_by(query,baseMean,log2FoldChange,lfcSE,pvalue,padj) %>% summarize(List = paste(List, collapse = ";"), .groups="drop")
write.csv(Pdam_MarkerGenes_broc_all_res_Pdam, "../output_RNA/differential_expression/Pdam_snRNA_marker_expr_PacutaLCM.csv")
Mcap_MarkerGenes_broc <- read.csv("../output_RNA/marker_genes/combined_Mcap_markers.csv") %>% select(protein_id_spA,cluster_guess_std,protein_id_spB) %>% dplyr::rename("Mcap_gene" = protein_id_spA,"query" = protein_id_spB, "List" = cluster_guess_std)
Mcap_MarkerGenes_broc <- Mcap_MarkerGenes_broc %>% filter(grepl("Pocillopora",query)&!grepl("unidentified",List)) %>% distinct()
Mcap_MarkerGenes_broc_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>%
select(query,everything()) %>% left_join(Mcap_MarkerGenes_broc)
Mcap_MarkerGenes_broc_all_res_Mcap <- Mcap_MarkerGenes_broc_all_res %>%
group_by(Mcap_gene) %>%
slice_min(order_by = padj, with_ties = FALSE) %>%
ungroup()
Mcap_MarkerGenes_broc_all_res_Pacuta <- Mcap_MarkerGenes_broc_all_res %>% group_by(query,baseMean,log2FoldChange,lfcSE,pvalue,padj) %>% summarize(List = paste(List, collapse = ";"), .groups="drop")
write.csv(Mcap_MarkerGenes_broc_all_res_Mcap, "../output_RNA/differential_expression/Mcap_snRNA_marker_expr_PacutaLCM.csv")
Biomin <- read.csv("../output_RNA/marker_genes/Pacuta_Biomin_Blast.csv") %>% dplyr::rename("query" = Pocillopora_acuta_best_hit) %>% select(-c(accessionnumber.geneID, Ref))
Biomin_broc <- read.csv("../output_RNA/marker_genes/Pacuta_Biomin_Spis_ortholog.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X,accessionnumber_gene_id, ref))
Biomin <- Biomin %>%
group_by(query,List) %>%
summarize(definition = paste(unique(definition), collapse = ","))
Biomin$def_short <- ifelse(nchar(Biomin$definition) > 40,
paste0(substr(Biomin$definition, 1, 37), "..."),
Biomin$definition)
Biomin_filtered_counts <- filtered_counts[(rownames(filtered_counts) %in% Biomin$query),]
Biomin_broc <- Biomin_broc %>%
group_by(query,List) %>%
summarize(definition = paste(unique(definition), collapse = ","))
Biomin_broc$def_short <- ifelse(nchar(Biomin_broc$definition) > 40,
paste0(substr(Biomin_broc$definition, 1, 37), "..."),
Biomin_broc$definition)
Biomin_broc_filtered_counts <- filtered_counts[(rownames(filtered_counts) %in% Biomin_broc$query),]
write.csv(Biomin_filtered_counts, "../output_RNA/differential_expression/Biomin_filtered_counts.csv")
He_etal_Nvec = He, S., Shao, W., Chen, S. (Cynthia), Wang, T. and Gibson, M. C. (2023). Spatial transcriptomics reveals a cnidarian segment polarity program in Nematostella vectensis. Current Biology 33, 2678-2689.e5.
DuBuc_etal_Nvec = DuBuc, T. Q., Stephenson, T. B., Rock, A. Q. and Martindale, M. Q. (2018). Hox and Wnt pattern the primary body axis of an anthozoan cnidarian before gastrulation. Nat Commun 9, 2007.
He_etal_Nvec <- read.csv("../output_RNA/marker_genes/He_etal_nematostella.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X))
He_etal_Nvec$def_short <- gsub("Homeobox protein", "Hox", He_etal_Nvec$Description, ignore.case = TRUE)
DuBuc_etal_Nvec <- read.csv("../output_RNA/marker_genes/Wnt_nematostella.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X))
DuBuc_etal_Nvec$def_short <- DuBuc_etal_Nvec$Gene_Name
HeatStressGenes <- read.csv("../output_RNA/marker_genes/HeatStressGenes_Pacuta.csv") %>% dplyr::rename("query" = Pacuta_gene) %>% select(-c(X))
DE_05$query <- rownames(DE_05)
resOrdered$query <- rownames(resOrdered)
join_genes_of_interest <- function(df, gene_set) {
df %>%
left_join(gene_set, by = "query") %>%
select(query, everything()) %>%
drop_na() %>% distinct()
}
DE_05_biomin <- join_genes_of_interest(DE_05, Biomin)
DE_05_Biomin_broc <- join_genes_of_interest(DE_05, Biomin_broc)
DE_05_marker <- join_genes_of_interest(DE_05, MarkerGenes)
DE_05_marker_broc <- join_genes_of_interest(DE_05, MarkerGenes_broc)
DE_05_He_etal <- join_genes_of_interest(DE_05, He_etal_Nvec)
DE_05_DuBuc_etal <- join_genes_of_interest(DE_05, DuBuc_etal_Nvec)
DESeq_biomin <- join_genes_of_interest(as.data.frame(resOrdered), Biomin)
DESeq_Biomin_broc <- join_genes_of_interest(as.data.frame(resOrdered), Biomin_broc)
DESeq_marker <- join_genes_of_interest(as.data.frame(resOrdered), MarkerGenes)
DESeq_marker_broc <- join_genes_of_interest(as.data.frame(resOrdered), MarkerGenes_broc)
DESeq_He_etal <- join_genes_of_interest(as.data.frame(resOrdered), He_etal_Nvec)
DESeq_DuBuc_etal <- join_genes_of_interest(as.data.frame(resOrdered), DuBuc_etal_Nvec)
DE_05_HeatStressGenes <- HeatStressGenes %>% left_join(DE_05, by="query") %>% select(query,everything()) %>% drop_na(baseMean)
DESeq_HeatStressGenes <- HeatStressGenes %>% left_join(as.data.frame(resOrdered), by="query") %>% select(query,everything()) %>% drop_na(baseMean)
write.csv(as.data.frame(DE_05_biomin), file=paste0(outdir,"/DE_05_biomin_annotation.csv"))
write.csv(as.data.frame(DE_05_marker), file=paste0(outdir,"/DE_05_markergene_annotation.csv"))
biomin_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(Biomin)
biomin_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>% select(query,everything()) %>% left_join(Biomin)
Biomin_broc_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(Biomin_broc)
Biomin_broc_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>% select(query,everything()) %>% left_join(Biomin_broc)
markers_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(MarkerGenes)
markers_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>% select(query,everything()) %>% left_join(MarkerGenes)
broc_markers_all_counts <- as.data.frame(counts(dds)) %>% mutate(query = rownames(dds)) %>% select(query,everything()) %>% left_join(MarkerGenes_broc)
broc_markers_all_res <- as.data.frame(res) %>% mutate(query = rownames(res)) %>% select(query,everything()) %>% left_join(MarkerGenes_broc)
#view biomin genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_biomin$query, ]), center = TRUE, scale = TRUE))
DE_biomin <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_biomin$def_short, fontsize_row = 5)
DE_biomin
save_ggplot(DE_biomin, "DE_biomin")
#view biomin genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_Biomin_broc$query, ]), center = TRUE, scale = TRUE))
DE_Biomin_broc <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_Biomin_broc$def_short, fontsize_row = 5)
DE_Biomin_broc
save_ggplot(DE_Biomin_broc, "DE_Biomin_broc")
#view marker genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_marker$query, ]), center = TRUE, scale = TRUE))
DE_marker <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,cutree_rows = 5,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker$List, fontsize_row = 4)
DE_marker
save_ggplot(DE_marker, "DE_marker")
DE_05_marker_grouped <- DE_05_marker %>% arrange(List) %>% mutate(List = as.factor(List))
z_scores <- t(scale(t(assay(vsd)[DE_05_marker_grouped$quer, ]), center = TRUE, scale = TRUE))
DE_05_marker_grouped_plot <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker_grouped$List, fontsize_row = 5)
DE_05_marker_grouped_plot
save_ggplot(DE_05_marker_grouped_plot, "DE_05_marker_grouped")
#view marker genes that are differentially expressed
z_scores <- t(scale(t(assay(vsd)[DE_05_marker_broc$query, ]), center = TRUE, scale = TRUE))
DE_marker <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,cutree_rows = 5,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker_broc$List, fontsize_row = 2)
DE_marker
save_ggplot(DE_marker, "DE_marker_broc")
DE_05_marker_broc_grouped <- DE_05_marker_broc %>% arrange(List) %>% mutate(List = as.factor(List))
z_scores <- t(scale(t(assay(vsd)[DE_05_marker_broc_grouped$quer, ]), center = TRUE, scale = TRUE))
DE_05_marker_broc_grouped_plot <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = DE_05_marker_broc_grouped$List, fontsize_row = 3)
DE_05_marker_broc_grouped_plot
save_ggplot(DE_05_marker_broc_grouped_plot, "DE_05_marker_broc_grouped")
Biomin_volcano <- EnhancedVolcano(biomin_all_res,
lab = biomin_all_res$def_short,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 40)
save_ggplot(Biomin_volcano, "Biomin_volcano")
Biomin_broc_volcano <- EnhancedVolcano(Biomin_broc_all_res,
lab = Biomin_broc_all_res$def_short,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 40)
save_ggplot(Biomin_broc_volcano, "Biomin_broc_volcano")
Marker_volcano <- EnhancedVolcano(markers_all_res,
lab = markers_all_res$List,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(Marker_volcano, "Marker_volcano")
Marker_volcano_names <- EnhancedVolcano(markers_all_res,
lab = markers_all_res$def_short,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(Marker_volcano_names, "Marker_volcano_names")
Marker_volcano <- EnhancedVolcano(broc_markers_all_res,
lab = broc_markers_all_res$List,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.01,
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(Marker_volcano, "Marker_volcano_broc")
EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue')
volcano_plain <- EnhancedVolcano(res,
lab = NA,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
title="",
subtitle="",
drawConnectors = TRUE,
widthConnectors = 0.75,
pointSize = 1,
labSize = 2,boxedLabels = TRUE,max.overlaps = 60)
save_ggplot(volcano_plain, "volcano_plain",width = 4, height = 6, units = "in", dpi = 300)
# wget protein sequence reference file
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz
# move to references direcotry
mv *gz ../references
# unzip files
gunzip ../references/*gz
#get the names of all the DEGs from the first column of the DEG csv file
tail -n +2 ../output_RNA/differential_expression/DEG_05.csv | cut -d',' -f1 | tr -d '"' > ../output_RNA/differential_expression/DEG_05_names.csv
#grep this file against the protein fasta file, first with wc -l to make sure the number of lines is correct (should be your number of DEGs)
grep -f ../output_RNA/differential_expression/DEG_05_names.csv ../references/Pocillopora_acuta_HIv2.genes.pep.faa | wc -l
#grep each header with the protein sequence after ("-A 1") and save to a new file
grep -A 1 -f ../output_RNA/differential_expression/DEG_05_names.csv ../references/Pocillopora_acuta_HIv2.genes.pep.faa > ../output_RNA/differential_expression/DEG_05_seqs.txt
On andromeda:
Blastp-ing only the DE genes against the entire nr database (will take a while)
cd ../scripts
nano DEG_05_blast.sh
#!/bin/bash
#SBATCH --job-name="DE_blast"
#SBATCH -t 240:00:00
#SBATCH --export=NONE
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH --mail-user=zdellaert@uri.edu #your email to send notifications
#SBATCH --mem=500GB
#SBATCH --error=../scripts/outs_errs/"%x_error.%j" #write out slurm error reports
#SBATCH --output=../scripts/outs_errs/"%x_output.%j" #write out any program outpus
#SBATCH --account=putnamlab
#SBATCH --nodes=2 --ntasks-per-node=24
module load BLAST+/2.15.0-gompi-2023a
cd ../output_RNA/differential_expression #set working directory
mkdir blast
cd blast
#nr database location andromeda: /data/shared/ncbi-db/.ncbirc
# points to current location: cat /data/shared/ncbi-db/.ncbirc
# [BLAST]
# BLASTDB=/data/shared/ncbi-db/2024-11-10
blastp -query ../DEG_05_seqs.txt -db nr -out DEG_05_blast_results.txt -outfmt 0 -evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 10
blastp -query ../DEG_05_seqs.txt -db nr -out DEG_05_blast_results_tab.txt -outfmt 6 -evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 1 \
-max_hsps 1
echo "Blast complete!" $(date)
sbatch DEG_05_blast.sh
cd ../output_RNA/differential_expression/blast
wc -l DEG_05_blast_results_tab.txt #3537 of the 3606 genes were annotated
#get just the NCBI database accession numbers for the blast results
cut -f2 DEG_05_blast_results_tab.txt > DEG_05_blast_accessions.txt
#remove any duplicates
sort -u DEG_05_blast_accessions.txt > unique_DEG_05_blast_accessions.txt
wc -l unique_DEG_05_blast_accessions.txt #3404 of the 3537 annotations were unique
while read acc; do
curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=$acc&rettype=gp&retmode=text" \
| grep "DEFINITION" | sed 's/DEFINITION //g' | awk -v id="$acc" '{print id "\t" $0}'
done < unique_DEG_05_blast_accessions.txt > DEG_05_blast_names.txt
wc -l DEG_05_blast_names.txt #3396 ; unsure why 8 are missing.
join -1 2 -2 1 -t $'\t' <(sort -k2 DEG_05_blast_results_tab.txt) <(sort DEG_05_blast_names.txt) > annotated_DEG_05_blast_results_tab.txt
On unity:
swissprot based on https://github.com/urol-e5/deep-dive/blob/main/D-Apul/code/20-Apul-gene-annotation.Rmd and https://github.com/urol-e5/deep-dive/blob/main/F-Pmea/code/20-Pmea-gene-annotation.Rmd
Steven’s notebook post here: https://sr320.github.io/tumbling-oysters/posts/sr320-27-go/
mkdir ../references/blast_dbs
cd ../references/blast_dbs
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2024_10_02.fasta.gz
gunzip -k uniprot_sprot_r2024_10_02.fasta.gz
rm uniprot_sprot_r2024_10_02.fasta.gz
head uniprot_sprot_r2024_10_02.fasta
echo "Number of Sequences"
grep -c ">" uniprot_sprot_r2024_10_02.fasta
# 572214 sequences
module load blast-plus/2.14.1
makeblastdb \
-in ../references/blast_dbs/uniprot_sprot_r2024_10_02.fasta \
-dbtype prot \
-out ../references/blast_dbs/uniprot_sprot_r2024_10_02
cd ../scripts
nano blastp_SwissProt.sh
#!/bin/bash
#SBATCH -t 18:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=48
#SBATCH --mem=500GB
#SBATCH --export=NONE
#SBATCH --error=../scripts/outs_errs/"%x_error.%j" #write out slurm error reports
#SBATCH --output=../scripts/outs_errs/"%x_output.%j" #write out any program outpus
#SBATCH --mail-type=BEGIN,END,FAIL #email you when job starts, stops and/or fails
#SBATCH -D /project/pi_hputnam_uri_edu/zdellaert/LaserCoral #set working directory
module load blast-plus/2.14.1
cd references/
mkdir annotation
fasta="Pocillopora_acuta_HIv2.genes.pep.faa"
blastp \
-query $fasta \
-db blast_dbs/uniprot_sprot_r2024_10_02 \
-out annotation/blastp_SwissProt_out.tab \
-evalue 1E-05 \
-num_threads 48 \
-max_target_seqs 1 \
-max_hsps 1 \
-outfmt 6
echo "Blast complete!" $(date)
cd references/annotation/
tr '|' '\t' < blastp_SwissProt_out.tab > blastp_SwissProt_out_sep.tab
cd ../references/annotation/
curl -H "Accept: text/plain; format=tsv" "https://rest.uniprot.org/uniprotkb/stream?fields=accession%2Creviewed%2Cid%2Cprotein_name%2Cgene_names%2Corganism_name%2Clength%2Cgo_p%2Cgo%2Cgo_id%2Cgo_c%2Cgo_f&format=tsv&query=%28reviewed%3Atrue%29" -o SwissProt-Annot-GO_111524.tsv
wc -l SwissProt-Annot-GO_111524.tsv
#572215
All code below based on https://github.com/urol-e5/deep-dive/blob/main/D-Apul/code/20-Apul-gene-annotation.Rmd and https://github.com/urol-e5/deep-dive/blob/main/F-Pmea/code/20-Pmea-gene-annotation.Rmd
Steven’s notebook post here: https://sr320.github.io/tumbling-oysters/posts/sr320-27-go/
bltabl <- read.csv("../references/annotation/blastp_SwissProt_out_sep.tab", sep = '\t', header = FALSE)
spgo <- read.csv("../references/annotation/SwissProt-Annot-GO_111524.tsv", sep = '\t', header = TRUE)
annot_tab <- left_join(bltabl, spgo, by = c("V3" = "Entry")) %>%
select(
query = V1,
blast_hit = V3,
evalue = V13,
ProteinNames = Protein.names,
BiologicalProcess = Gene.Ontology..biological.process.,
GeneOntologyIDs = Gene.Ontology.IDs,
CellularComponent = Gene.Ontology..cellular.component.,
MolecularFunction = Gene.Ontology..molecular.function.,
)
head(annot_tab)
## query blast_hit evalue
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a Q4JAI4 1.02e-37
## 2 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 O08807 9.62e-116
## 3 Pocillopora_acuta_HIv2___RNAseq.g7985.t1 O74212 3.56e-158
## 4 Pocillopora_acuta_HIv2___TS.g15308.t1 Q09575 1.08e-12
## 5 Pocillopora_acuta_HIv2___RNAseq.g2057.t1 P0C1P0 8.81e-14
## 6 Pocillopora_acuta_HIv2___RNAseq.g4696.t1 Q9W2Q5 8.98e-69
## ProteinNames
## 1 Methionine synthase (EC 2.1.1.-) (Homocysteine methyltransferase)
## 2 Peroxiredoxin-4 (EC 1.11.1.24) (Antioxidant enzyme AOE372) (Peroxiredoxin IV) (Prx-IV) (Thioredoxin peroxidase AO372) (Thioredoxin-dependent peroxide reductase A0372) (Thioredoxin-dependent peroxiredoxin 4)
## 3 Acyl-lipid (8-3)-desaturase (EC 1.14.19.30) (Delta(5) fatty acid desaturase) (Delta-5 fatty acid desaturase)
## 4 Uncharacterized protein K02A2.6
## 5 Phosphatidylinositol N-acetylglucosaminyltransferase subunit Y (Phosphatidylinositol-glycan biosynthesis class Y protein) (PIG-Y)
## 6 Calcium and integrin-binding family member 2
## BiologicalProcess
## 1 methionine biosynthetic process [GO:0009086]; methylation [GO:0032259]
## 2 cell redox homeostasis [GO:0045454]; extracellular matrix organization [GO:0030198]; hydrogen peroxide catabolic process [GO:0042744]; male gonad development [GO:0008584]; negative regulation of male germ cell proliferation [GO:2000255]; protein maturation by protein folding [GO:0022417]; reactive oxygen species metabolic process [GO:0072593]; response to oxidative stress [GO:0006979]; spermatogenesis [GO:0007283]
## 3 long-chain fatty acid biosynthetic process [GO:0042759]; unsaturated fatty acid biosynthetic process [GO:0006636]
## 4 DNA integration [GO:0015074]
## 5 GPI anchor biosynthetic process [GO:0006506]
## 6 calcium ion homeostasis [GO:0055074]; phototransduction [GO:0007602]
## GeneOntologyIDs
## 1 GO:0003871; GO:0008270; GO:0009086; GO:0032259
## 2 GO:0005615; GO:0005737; GO:0005739; GO:0005783; GO:0005790; GO:0005829; GO:0006979; GO:0007283; GO:0008379; GO:0008584; GO:0022417; GO:0030198; GO:0042744; GO:0042802; GO:0045454; GO:0072593; GO:0140313; GO:2000255
## 3 GO:0006636; GO:0016020; GO:0020037; GO:0042759; GO:0046872; GO:0102866
## 4 GO:0003676; GO:0005737; GO:0008270; GO:0015074; GO:0019899; GO:0042575
## 5 GO:0000506; GO:0006506
## 6 GO:0000287; GO:0005509; GO:0005737; GO:0007602; GO:0055074
## CellularComponent
## 1
## 2 cytoplasm [GO:0005737]; cytosol [GO:0005829]; endoplasmic reticulum [GO:0005783]; extracellular space [GO:0005615]; mitochondrion [GO:0005739]; smooth endoplasmic reticulum [GO:0005790]
## 3 membrane [GO:0016020]
## 4 cytoplasm [GO:0005737]; DNA polymerase complex [GO:0042575]
## 5 glycosylphosphatidylinositol-N-acetylglucosaminyltransferase (GPI-GnT) complex [GO:0000506]
## 6 cytoplasm [GO:0005737]
## MolecularFunction
## 1 5-methyltetrahydropteroyltriglutamate-homocysteine S-methyltransferase activity [GO:0003871]; zinc ion binding [GO:0008270]
## 2 identical protein binding [GO:0042802]; molecular sequestering activity [GO:0140313]; thioredoxin peroxidase activity [GO:0008379]
## 3 di-homo-gamma-linolenate delta5 desaturase activity [GO:0102866]; heme binding [GO:0020037]; metal ion binding [GO:0046872]
## 4 enzyme binding [GO:0019899]; nucleic acid binding [GO:0003676]; zinc ion binding [GO:0008270]
## 5
## 6 calcium ion binding [GO:0005509]; magnesium ion binding [GO:0000287]
write.table(annot_tab,
file = "../references/annotation/protein-GO.tsv",
sep = "\t",
row.names = FALSE,
quote = FALSE)
DESeq_SwissProt <- as.data.frame(resOrdered) %>% left_join(annot_tab) %>% select(query,everything())
DE_05_SwissProt <- DE_05 %>% left_join(annot_tab) %>% select(query,everything())
write.csv(as.data.frame(DE_05_SwissProt), file=paste0(outdir,"/DE_05_SwissProt_annotation.csv"))
DE_05_SwissProt$short_name <- ifelse(nchar(DE_05_SwissProt$ProteinNames) > 30,
paste0(substr(DE_05_SwissProt$ProteinNames, 1, 27), "..."),
DE_05_SwissProt$ProteinNames)
gene_labels <- DE_05_SwissProt %>%
select(query,short_name) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g6351.t1"
## [2] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [5] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [8] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g3832.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g25431.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g9588.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [18] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g16202.t1"
## [20] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [21] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g26604.t1"
## [24] "Pocillopora_acuta_HIv2___TS.g18100.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g25327.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g19085.t1"
## [30] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [31] "Pocillopora_acuta_HIv2___TS.g11360.t1"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [33] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [37] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g21374.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g5252.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g22853.t1"
## [42] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [44] "Pocillopora_acuta_HIv2___TS.g16802.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [48] "Pocillopora_acuta_HIv2___RNAseq.g26594.t2"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g19115.t1"
## [50] "Pocillopora_acuta_HIv2___TS.g4983.t1"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_DE_SwissProt")
#view most significantly differentially expressed genes by LFC
select <- order(abs(res$log2FoldChange),decreasing = TRUE)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [2] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [5] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [8] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g22261.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [18] "Pocillopora_acuta_HIv2___TS.g4983.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [20] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [21] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [24] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [30] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [31] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [33] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g9631.t1"
## [37] "Pocillopora_acuta_HIv2___RNAseq.g7627.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g8062.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g1102.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g14484.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g21373.t1"
## [42] "Pocillopora_acuta_HIv2___TS.g16384.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g13561.t1"
## [44] "Pocillopora_acuta_HIv2___RNAseq.g1126.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g9210.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.g24649.t1"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g11659.t1"
## [48] "Pocillopora_acuta_HIv2___TS.g25577.t1a"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g27681.t1b"
## [50] "Pocillopora_acuta_HIv2___TS.g1968.t2"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_LFC_DE_SwissProt")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral_SwissProt")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi_SwissProt")
DE_05_SwissProt$short_GO <- ifelse(nchar(DE_05_SwissProt$BiologicalProcess) > 30,
paste0(substr(DE_05_SwissProt$BiologicalProcess, 1, 27), "..."),
DE_05_SwissProt$BiologicalProcess)
gene_labels <- DE_05_SwissProt %>% select(query,short_GO) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
top50_DE
save_ggplot(top50_DE, "top50_DE_Blast2GO")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral_Blast2GO")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi_Blast2GO")
Manual <- read.csv(paste0(outdir,"/DE_05_Manual_annotation.csv")) %>% arrange(X)
Manual <- Manual %>% filter(abs(log2FoldChange) > 1)
Manual$Heatmap_Label <- gsub("Homeobox protein", "Hox", Manual$Heatmap_Label, ignore.case = TRUE)
Manual$Heatmap_Label <- gsub("Protein Wnt", "Wnt", Manual$Heatmap_Label, ignore.case = TRUE)
gene_labels <- Manual %>%
select(query,Heatmap_Label) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
#view most significantly differentially expressed genes
select <- order(res$padj)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g6351.t1"
## [2] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [5] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [8] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g3832.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g25431.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g9588.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [18] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g16202.t1"
## [20] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [21] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g26604.t1"
## [24] "Pocillopora_acuta_HIv2___TS.g18100.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g25327.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g19085.t1"
## [30] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [31] "Pocillopora_acuta_HIv2___TS.g11360.t1"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [33] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [37] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g21374.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g5252.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g22853.t1"
## [42] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [44] "Pocillopora_acuta_HIv2___TS.g16802.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [48] "Pocillopora_acuta_HIv2___RNAseq.g26594.t2"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g19115.t1"
## [50] "Pocillopora_acuta_HIv2___TS.g4983.t1"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_DE_Manual")
#view most significantly differentially expressed genes by LFC
select <- order(abs(res$log2FoldChange),decreasing = TRUE)[1:50]
rownames(res)[select]
## [1] "Pocillopora_acuta_HIv2___RNAseq.g28575.t1a"
## [2] "Pocillopora_acuta_HIv2___TS.g18104.t1"
## [3] "Pocillopora_acuta_HIv2___RNAseq.g27038.t1"
## [4] "Pocillopora_acuta_HIv2___RNAseq.g14025.t1"
## [5] "Pocillopora_acuta_HIv2___RNAseq.g2165.t1"
## [6] "Pocillopora_acuta_HIv2___RNAseq.g14330.t3"
## [7] "Pocillopora_acuta_HIv2___RNAseq.g14090.t1"
## [8] "Pocillopora_acuta_HIv2___TS.g30765.t1"
## [9] "Pocillopora_acuta_HIv2___RNAseq.g8006.t1"
## [10] "Pocillopora_acuta_HIv2___RNAseq.g10378.t1"
## [11] "Pocillopora_acuta_HIv2___RNAseq.g14253.t1"
## [12] "Pocillopora_acuta_HIv2___RNAseq.11056_t"
## [13] "Pocillopora_acuta_HIv2___RNAseq.g22261.t1"
## [14] "Pocillopora_acuta_HIv2___RNAseq.g14021.t1"
## [15] "Pocillopora_acuta_HIv2___RNAseq.g19082.t1"
## [16] "Pocillopora_acuta_HIv2___TS.g19991.t2"
## [17] "Pocillopora_acuta_HIv2___RNAseq.g19284.t1"
## [18] "Pocillopora_acuta_HIv2___TS.g4983.t1"
## [19] "Pocillopora_acuta_HIv2___RNAseq.g21000.t1"
## [20] "Pocillopora_acuta_HIv2___RNAseq.g20860.t1"
## [21] "Pocillopora_acuta_HIv2___TS.g9414.t1"
## [22] "Pocillopora_acuta_HIv2___RNAseq.g8588.t1"
## [23] "Pocillopora_acuta_HIv2___RNAseq.g11588.t1"
## [24] "Pocillopora_acuta_HIv2___RNAseq.g12281.t1"
## [25] "Pocillopora_acuta_HIv2___TS.g27642.t1b"
## [26] "Pocillopora_acuta_HIv2___RNAseq.g7803.t1"
## [27] "Pocillopora_acuta_HIv2___RNAseq.g8119.t1"
## [28] "Pocillopora_acuta_HIv2___RNAseq.g17117.t1"
## [29] "Pocillopora_acuta_HIv2___RNAseq.g22978.t3b"
## [30] "Pocillopora_acuta_HIv2___RNAseq.30415_t"
## [31] "Pocillopora_acuta_HIv2___TS.g16008.t2"
## [32] "Pocillopora_acuta_HIv2___RNAseq.g2406.t1"
## [33] "Pocillopora_acuta_HIv2___RNAseq.g25759.t1"
## [34] "Pocillopora_acuta_HIv2___RNAseq.g24120.t1"
## [35] "Pocillopora_acuta_HIv2___RNAseq.g14336.t1"
## [36] "Pocillopora_acuta_HIv2___RNAseq.g9631.t1"
## [37] "Pocillopora_acuta_HIv2___RNAseq.g7627.t1"
## [38] "Pocillopora_acuta_HIv2___RNAseq.g8062.t1"
## [39] "Pocillopora_acuta_HIv2___RNAseq.g1102.t1"
## [40] "Pocillopora_acuta_HIv2___RNAseq.g14484.t1"
## [41] "Pocillopora_acuta_HIv2___RNAseq.g21373.t1"
## [42] "Pocillopora_acuta_HIv2___TS.g16384.t1"
## [43] "Pocillopora_acuta_HIv2___RNAseq.g13561.t1"
## [44] "Pocillopora_acuta_HIv2___RNAseq.g1126.t1"
## [45] "Pocillopora_acuta_HIv2___RNAseq.g9210.t1"
## [46] "Pocillopora_acuta_HIv2___RNAseq.g24649.t1"
## [47] "Pocillopora_acuta_HIv2___RNAseq.g11659.t1"
## [48] "Pocillopora_acuta_HIv2___TS.g25577.t1a"
## [49] "Pocillopora_acuta_HIv2___RNAseq.g27681.t1b"
## [50] "Pocillopora_acuta_HIv2___TS.g1968.t2"
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "top50_LFC_DE_Manual")
#view genes Higher in Aboral, Lower in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange,decreasing = TRUE)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_Aboral <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), labels_col = NA, annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 8.5)
up_Aboral
save_ggplot(up_Aboral, "up_Aboral_Manual")
#view genes Lower in Aboral, Higher in OralEpi, ordered by log2FoldChange
select <- order(res$log2FoldChange)[1:50]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
up_OralEpi <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), labels_col = NA, annotation_colors = ann_colors,
labels_row =gene_labels[match(rownames(res)[select],(gene_labels$query)),2], fontsize_row = 8.5)
up_OralEpi
save_ggplot(up_OralEpi, "up_OralEpi_Manual")
cd ../references
#download the genome protein fasta if you have not already
wget http://cyanophora.rutgers.edu/Pocillopora_acuta/Pocillopora_acuta_HIv2.genes.pep.faa.gz
#unzip file
gunzip Pocillopora_acuta_HIv2.genes.pep.faa.gz
In unity
salloc -p cpu -c 8 --mem 32G
module load uri/main
module load BLAST+/2.15.0-gompi-2023a
#make blast_dbs directory if you haven't done so above
mkdir blast_dbs
cd blast_dbs
makeblastdb -in ../Pocillopora_acuta_HIv2.genes.pep.faa -out Pacuta_prot -dbtype prot
cd ../../output_RNA/differential_expression/
#make blast output directory if you haven't done so above
mkdir blast
cd blast
nano YinYang.txt
add accession numbers of interest:
XP_048585772.1
XP_048585773.1
XP_048585774.1
NP_996806.2
NP_777560.2
# Read the input file line by line and fetch FASTA sequences
while read -r accession; do
if [[ -n "$accession" ]]; then
echo "Fetching $accession..."
curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&id=${accession}&rettype=fasta&retmode=text" >> "YinYang.fasta"
echo >> "YinYang.fasta" # Add a newline between sequences
sleep 1 # Avoid hitting rate limits
fi
done < "YinYang.txt"
# run blast with human-readable output
blastp -query YinYang.fasta -db ../../../references/blast_dbs/Pacuta_prot -out YinYang_blast_results.txt -outfmt 0
#looks like there are a lot of matches for each gene! I am going to do a tab search with a very low e-value cutoff:
# run blast with tabular output
blastp -query YinYang.fasta -db ../../../references/blast_dbs/Pacuta_prot -out YinYang_blast_results_tab.txt -outfmt 6 -evalue 1e-25
Great! Interestingly, the same gene, Pocillopora_acuta_HIv2_RNAseq.g25242.t1 is the top match for all 5 proteins I searched. Pocillopora_acuta_HIv2_TS.g24434.t1 is the second best match for all 5. That is interesting! Pocillopora_acuta_HIv2_RNAseq.g7583.t1 is also worth looking at. And Pocillopora_acuta_HIv2_TS.g21338.t1 matched all three YY1 isoforms.
YinYangs <- c("Pocillopora_acuta_HIv2___RNAseq.g25242.t1",
"Pocillopora_acuta_HIv2___TS.g24434.t1",
"Pocillopora_acuta_HIv2___RNAseq.g7583.t1",
"Pocillopora_acuta_HIv2___TS.g21338.t1")
for (i in 1:length(YinYangs)){
plotCounts(dds, gene=YinYangs[i], intgroup=c("Tissue"),)
}
as.data.frame(resOrdered)[YinYangs,]
## baseMean log2FoldChange lfcSE
## Pocillopora_acuta_HIv2___RNAseq.g25242.t1 2388.43282 -0.65267532 0.8125595
## Pocillopora_acuta_HIv2___TS.g24434.t1 86.14232 -0.16148954 1.0080043
## Pocillopora_acuta_HIv2___RNAseq.g7583.t1 429.08727 0.06121494 0.9629085
## Pocillopora_acuta_HIv2___TS.g21338.t1 277.12637 -0.03463576 1.0095974
## pvalue padj
## Pocillopora_acuta_HIv2___RNAseq.g25242.t1 0.1695296 0.3778825
## Pocillopora_acuta_HIv2___TS.g24434.t1 0.2933826 0.5402220
## Pocillopora_acuta_HIv2___RNAseq.g7583.t1 0.8212358 0.9266210
## Pocillopora_acuta_HIv2___TS.g21338.t1 0.2453329 0.4838418
## query
## Pocillopora_acuta_HIv2___RNAseq.g25242.t1 Pocillopora_acuta_HIv2___RNAseq.g25242.t1
## Pocillopora_acuta_HIv2___TS.g24434.t1 Pocillopora_acuta_HIv2___TS.g24434.t1
## Pocillopora_acuta_HIv2___RNAseq.g7583.t1 Pocillopora_acuta_HIv2___RNAseq.g7583.t1
## Pocillopora_acuta_HIv2___TS.g21338.t1 Pocillopora_acuta_HIv2___TS.g21338.t1
Okay! None of these genes are differentially expressed between the tissues. That is interesting and good to know. Pocillopora_acuta_HIv2___RNAseq.g25242.t1 has the highest basal expression of all the potential isoforms of this transcription factor.
DE_05_SwissProt$short_name <- ifelse(nchar(DE_05_SwissProt$ProteinNames) > 80,
paste0(substr(DE_05_SwissProt$ProteinNames, 1, 77), "..."),
DE_05_SwissProt$ProteinNames)
TRP <- Manual %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
select <- TRP$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 12)
top50_DE
save_ggplot(top50_DE, "TRP_DE_SwissProt", width = 6.43, height = 4.25, units = "in", dpi = 300)
annot_tab$short_name <- ifelse(nchar(annot_tab$ProteinNames) > 80,
paste0(substr(annot_tab$ProteinNames, 1, 77), "..."),
annot_tab$ProteinNames)
swissprot_labels <- annot_tab %>%
select(query,short_name) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
TRP <- annot_tab %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
select1 <- TRP$query
select <- match(select1,rownames(vsd))
select <- select[!is.na(select)]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = swissprot_labels[match(select1,swissprot_labels$query),2], fontsize_row = 6)
top50_DE
save_ggplot(top50_DE, "TRP_SwissProt")
DE_05_biomin_filtered <- DE_05_biomin %>% left_join(Manual,by="query" ) #%>% filter(!(Heatmap_Label %in% c("Myosin-9", "Actin, cytoplasmic")))
select <- DE_05_biomin_filtered$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
DE_biomin <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
DE_biomin
save_ggplot(DE_biomin, "DE_biomin")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k as needed
# Create a dataframe to manage clusters and reordering
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate heatmap with reordered rows and labels
DE_biomin <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
show_rownames = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8
)
save_ggplot(DE_biomin, "clusters_clean/DE_biomin")
#############################
DE_05_Biomin_broc_filtered <- DE_05_Biomin_broc %>% left_join(Manual,by="query" ) %>% filter(!(Heatmap_Label %in% c("Myosin-9", "Actin, cytoplasmic")))
select <- DE_05_Biomin_broc_filtered$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
DE_Biomin_broc <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
DE_Biomin_broc
save_ggplot(DE_Biomin_broc, "DE_Biomin_broc")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k as needed
# Create a dataframe to manage clusters and reordering
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate heatmap with reordered rows and labels
DE_Biomin_broc <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
show_rownames = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8
)
save_ggplot(DE_Biomin_broc, "clusters_clean/DE_Biomin_broc")
vst_counts <- assay(vsd)["Pocillopora_acuta_HIv2___RNAseq.g15280.t1", ]
df <- as.data.frame(colData(dds))
df$expression <- vst_counts
library(ggplot2)
ggplot(df, aes(x=Tissue, y=expression, group=Fragment, color=Fragment)) +
geom_point(size=3) +
geom_line() +
theme_minimal() +
labs(title="VST Expression by Tissue", y="VST normalized expression")
WNT <- Manual %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE)|grepl("frizzle", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE))#|grepl("wnt", BiologicalProcess, ignore.case = TRUE))
select <- WNT$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "WNT_Hox_DE_SwissProt")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 26 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/WNT_Hox_DE_SwissProt")
WNT_all <- Manual %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE)|grepl("frizzle", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE))#|grepl("wnt", BiologicalProcess, ignore.case = TRUE))
WNT <- Manual %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE))
FRZ <- Manual %>% filter(grepl("frizzle", Heatmap_Label, ignore.case = TRUE))
HOX <- Manual %>% filter(grepl("hox", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE))
select <- WNT$query
gene_layout <- gff_transcripts %>%
filter(query %in% select) %>%
arrange(chromosome, start) %>% left_join(WNT) %>% distinct()
expr_df <- assay(vsd)[select, ] %>%
as.data.frame() %>%
rownames_to_column("query") %>%
pivot_longer(-query, names_to = "Sample", values_to = "vsd_expr") %>%
left_join(as.data.frame(colData(vsd)), by = "Sample") %>%
group_by(query, Tissue) %>%
summarise(mean_expr = mean(vsd_expr), .groups = "drop") %>%
pivot_wider(names_from = Tissue, values_from = mean_expr)
plot_df <- gene_layout %>%
left_join(expr_df, by = "query")
plot_df <- plot_df %>%
mutate(highest_expr_tissue = case_when(
OralEpi > Aboral ~ "OralEpi",
Aboral > OralEpi ~ "Aboral",
TRUE ~ "equal"
))
plot_df <- plot_df %>%
mutate(chromosome_split = case_when(
chromosome == "Pocillopora_acuta_HIv2___xfSc0000014" & start < 3.6e6 ~ "xfSc0000014_A",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000014" & start >= 3.65e6 ~ "xfSc0000014_B",
TRUE ~ chromosome
))
plot_df$Heatmap_Label <- gsub("Protein ","",plot_df$Heatmap_Label)
heat_df <- plot_df %>%
select(query, start, end, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start + end) / 2) # for positioning tile center
heat_df <- heat_df %>%
left_join(plot_df %>% select(query, chromosome_split), by = "query") %>%
mutate(chromosome = chromosome_split)
library(gggenes)
library(ggnewscale)
Wnt_chr <- ggplot(plot_df, aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df,
aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x")+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
Wnt_chr
save_ggplot(Wnt_chr, "Wnt_chromosome")
Wnt_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 0.5) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df,
aes(x = (start + end)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 4, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome_split, scales = "free_x") +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot(Wnt_chr_heatmap, "Wnt_chromosome_expression",width = 20, height = 5)
# Shift gene coordinates within each chromosome
plot_df_shifted <- plot_df %>%
group_by(chromosome) %>%
mutate(
chr_start = min(start),
start_shifted = start - chr_start + 10, # +10 if you want to avoid 0
end_shifted = end - chr_start + 10
) %>%
ungroup()
heat_df_shifted <- plot_df_shifted %>%
select(query, start_shifted, end_shifted, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start_shifted + end_shifted) / 2) # for positioning tile center
Wnt_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df_shifted,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 0.5) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df_shifted,
aes(xmin = start_shifted, xmax = end_shifted, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df_shifted,
aes(x = (start_shifted + end_shifted)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 4, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome, scales = "fixed",ncol=1) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot(Wnt_chr_heatmap, "Wnt_chromosome_expression_length_preserved",width = 30, height = 5)
HOX <- Manual %>% filter(grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE))
select <- HOX$query
gene_layout <- gff_transcripts %>%
filter(query %in% select) %>%
arrange(chromosome, start) %>% left_join(HOX) %>% distinct()
expr_df <- assay(vsd)[select, ] %>%
as.data.frame() %>%
rownames_to_column("query") %>%
pivot_longer(-query, names_to = "Sample", values_to = "vsd_expr") %>%
left_join(as.data.frame(colData(vsd)), by = "Sample") %>%
group_by(query, Tissue) %>%
summarise(mean_expr = mean(vsd_expr), .groups = "drop") %>%
pivot_wider(names_from = Tissue, values_from = mean_expr)
plot_df <- gene_layout %>%
left_join(expr_df, by = "query")
plot_df <- plot_df %>%
mutate(highest_expr_tissue = case_when(
OralEpi > Aboral ~ "OralEpi",
Aboral > OralEpi ~ "Aboral",
TRUE ~ "equal"
))
plot_df <- plot_df %>%
mutate(chromosome_split = case_when(
chromosome == "Pocillopora_acuta_HIv2___xfSc0000002" & start < 2.2e6 ~ "xfSc0000002_A",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000002" & start >= 6.4e6 ~ "xfSc0000002_B",
chromosome == "Pocillopora_acuta_HIv2___Sc0000011" & start < 2e6 ~ "Sc0000011_A",
chromosome == "Pocillopora_acuta_HIv2___Sc0000011" & start >= 2e6 ~ "Sc0000011_B",
chromosome == "Pocillopora_acuta_HIv2___Sc0000024" & start < 420000 ~ "Sc0000024_A",
chromosome == "Pocillopora_acuta_HIv2___Sc0000024" & start >= 2300000 ~ "Sc0000024_B",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000008" & start < 2e6 ~ "xfSc0000008_A",
chromosome == "Pocillopora_acuta_HIv2___xfSc0000008" & start >= 2e6 ~ "xfSc0000008_B",
TRUE ~ chromosome
))
heat_df <- plot_df %>%
select(query, start, end, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start + end) / 2) # for positioning tile center
heat_df <- heat_df %>%
left_join(plot_df %>% select(query, chromosome_split), by = "query") %>%
mutate(chromosome = chromosome_split)
library(gggenes)
library(ggnewscale)
HOX_chr <- ggplot(plot_df[1:6,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome")
HOX_chr <- ggplot(plot_df[7:12,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome2")
HOX_chr <- ggplot(plot_df[13:19,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome3")
HOX_chr <- ggplot(plot_df[20:24,], aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_label(aes(x = (start + end)/2, y = "_Genes_ ", label = Heatmap_Label),
vjust = -1.75, size = 2, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome_split, scales = "free_x",ncol=1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(2, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
HOX_chr
save_ggplot(HOX_chr, "Hox_chromosome4")
HOX_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 0.5) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df,
aes(x = (start + end)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 2 , fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome_split, scales = "free_x") +
theme_minimal() +
theme(
axis.text.y = element_text(size = 5),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +#scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot(HOX_chr_heatmap, "Hox_chromosome_expression",width = 20, height = 5)
# Shift gene coordinates within each chromosome
plot_df_shifted <- plot_df %>%
group_by(chromosome_split) %>%
mutate(
chr_start = min(start),
start_shifted = start - chr_start + 10, # +10 if you want to avoid 0
end_shifted = end - chr_start + 10
) %>%
ungroup()
heat_df_shifted <- plot_df_shifted %>%
select(query, start_shifted, end_shifted, width,chromosome_split, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start_shifted + end_shifted) / 2) # for positioning tile center
HOX_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df_shifted,
aes(x = midpoint, y = tissue, fill = expression,width = width),colour="black",
height = 1) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df_shifted,
aes(xmin = start_shifted, xmax = end_shifted, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_label(data = plot_df_shifted,
aes(x = (start_shifted + end_shifted)/2, y = "_Genes_", label = Heatmap_Label),
vjust = 1.5, size = 4, fill = "lightgrey", label.size = 0.1,fontface="bold")+
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome_split, scales = "fixed",ncol=1) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +scale_y_discrete(expand = expansion(mult = c(1, 0)))+
labs(x = "Genomic position")
save_ggplot_big <- function(plot, filename, width = 10, height = 7, units = "in", dpi = 300) {
png_path <- file.path(outdir, paste0(filename, ".png"))
pdf_dir <- file.path(outdir, "pdf_figs")
pdf_path <- file.path(pdf_dir, paste0(filename, ".pdf"))
# Ensure the pdf_figs directory exists
if (!dir.exists(pdf_dir)) dir.create(pdf_dir, recursive = TRUE)
# Save plots
ggsave(filename = png_path, plot = plot, width = width, height = height, units = units, dpi = dpi,limitsize = FALSE)
ggsave(filename = pdf_path, plot = plot, width = width, height = height, units = units, dpi = dpi,limitsize = FALSE)
}
save_ggplot_big(HOX_chr_heatmap, "Hox_chromosome_expression_length_preserved",width = 15, height = 25)
select <- unique(DE_05_He_etal$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "He_etal_Nvec_DE_SwissProt")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 4 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/He_etal_Nvec_DE_SwissProt")
#### all
labels <- DESeq_He_etal$def_short
select <- DESeq_He_etal$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = labels, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "He_etal_Nvec_all_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = labels,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 9 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/He_etal_Nvec_all_SwissProt")
gene_layout <- gff_transcripts %>%
filter(query %in% select) %>%
arrange(chromosome, start) %>% left_join(DESeq_He_etal) %>% distinct()
expr_df <- assay(vsd)[select, ] %>%
as.data.frame() %>%
rownames_to_column("query") %>%
pivot_longer(-query, names_to = "Sample", values_to = "vsd_expr") %>%
left_join(as.data.frame(colData(vsd)), by = "Sample") %>%
group_by(query, Tissue) %>%
summarise(mean_expr = mean(vsd_expr), .groups = "drop") %>%
pivot_wider(names_from = Tissue, values_from = mean_expr)
plot_df <- gene_layout %>%
left_join(expr_df, by = "query")
plot_df <- plot_df %>%
mutate(highest_expr_tissue = case_when(
OralEpi > Aboral ~ "OralEpi",
Aboral > OralEpi ~ "Aboral",
TRUE ~ "equal"
))
heat_df <- plot_df %>%
select(query, start, end, width,chromosome, OralEpi, Aboral) %>%
pivot_longer(cols = c(OralEpi, Aboral), names_to = "tissue", values_to = "expression") %>%
mutate(midpoint = (start + end) / 2) # for positioning tile center
library(gggenes)
library(ggnewscale)
Hox_chr <- ggplot(plot_df, aes(xmin = start, xmax = end, y = "Hox genes", forward = strand == "+",
fill = highest_expr_tissue)) +
geom_gene_arrow(arrowhead_height = unit(3, "mm"), arrowhead_width = unit(1.5, "mm")) +
geom_gene_label(aes(label = Gene_Name), size = 50) +
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey")) +
theme_minimal() +
facet_wrap(~ chromosome, scales = "free_x", ncol = 1)+
theme(axis.text.y = element_blank(),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")) +
labs(x = "Genomic position", fill = "Higher expression in")
Hox_chr_heatmap <- ggplot() +
# Heatmap tiles
geom_tile(data = heat_df,
aes(x = midpoint, y = tissue, fill = expression),colour="black",
height = 0.4) +
# Color for heatmap (expression level)
scale_fill_gradient(low = "white", high = "firebrick", name = "Expression") +
new_scale_fill() +
# Gene arrows
geom_gene_arrow(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_",
forward = strand == "+", fill = highest_expr_tissue),
arrowhead_height = unit(3, "mm"),
arrowhead_width = unit(1.5, "mm")) +
geom_gene_label(data = plot_df,
aes(xmin = start, xmax = end, y = "_Genes_", label = Gene_Name),
size = 50) +
scale_fill_manual(values = c("OralEpi" = "palegreen3", "Aboral" = "mediumpurple1", "equal" = "grey"),
name = "Higher expression in") +
facet_wrap(~ chromosome, scales = "free_x", ncol = 1) +
theme_minimal() +
theme(
axis.text.y = element_text(size = 10),
axis.title.y = element_blank(),
panel.spacing = unit(.1, "lines")
) +
labs(x = "Genomic position")
save_ggplot_big(Hox_chr, "He_etal_Nvec_all_SwissProt_chromosome", width = 20, height = 40)
save_ggplot_big(Hox_chr_heatmap, "He_etal_Nvec_all_SwissProt_chromosome_expression", width = 20, height = 40)
select <- unique(DE_05_DuBuc_etal$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "DuBuc_etal_Nvec_DE_SwissProt")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white"
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/DuBuc_etal_Nvec_DE_SwissProt")
DESeq_He_etal$source <- "He_etal_2023"
DESeq_DuBuc_etal$source <- "DuBuc_etal_2018"
Hox_Literature <- rbind(DESeq_He_etal,DESeq_DuBuc_etal)
DESeq_Hox_Literature_collapsed <- Hox_Literature %>%
group_by(query, baseMean, log2FoldChange, lfcSE, pvalue, padj) %>%
summarize(
Gene_Name = paste(unique(Gene_Name), collapse = "; "),
Description = paste(unique(Description), collapse = "; "),
def_short = paste(unique(def_short), collapse = "; "),
source = paste(unique(source), collapse = "; "),
.groups = "drop"
)
DE_05_Hox_Literature_collapsed <- DESeq_Hox_Literature_collapsed %>% filter(padj<0.05) %>% filter(abs(log2FoldChange) >1)
WNT_HOX_MAN <- Manual %>% select(-X) %>% filter(grepl("wnt", Heatmap_Label, ignore.case = TRUE)|grepl("frizzle", Heatmap_Label, ignore.case = TRUE)|grepl("homeobox", Heatmap_Label, ignore.case = TRUE)|grepl("hox", Heatmap_Label, ignore.case = TRUE)) %>% select(-c(blast_hit,evalue,ProteinNames,BiologicalProcess,GeneOntologyIDs))# %>% rename(Description = Heatmap_Label)
COMBINED_HOX_WNT_all <- full_join(DESeq_Hox_Literature_collapsed,WNT_HOX_MAN,by=c("query")) %>%
mutate(
baseMean = coalesce(baseMean.x, baseMean.y),
log2FoldChange = coalesce(log2FoldChange.x, log2FoldChange.y),
lfcSE = coalesce(lfcSE.x, lfcSE.y),
pvalue = coalesce(pvalue.x, pvalue.y),
padj = coalesce(padj.x, padj.y)
) %>%
select(-ends_with(".x"), -ends_with(".y"))
COMBINED_HOX_WNT_DE05 <- full_join(DE_05_Hox_Literature_collapsed,WNT_HOX_MAN,by=c("query")) %>%
mutate(
baseMean = coalesce(baseMean.x, baseMean.y),
log2FoldChange = coalesce(log2FoldChange.x, log2FoldChange.y),
lfcSE = coalesce(lfcSE.x, lfcSE.y),
pvalue = coalesce(pvalue.x, pvalue.y),
padj = coalesce(padj.x, padj.y)
) %>%
select(-ends_with(".x"), -ends_with(".y"))
select <- unique(COMBINED_HOX_WNT_DE05$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "Hox_All_DE")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(2,5,12,14,19,25)
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/Hox_All_DE")
COMBINED_HOX_WNT_all <- COMBINED_HOX_WNT_all %>% mutate(Heatmap_Label = coalesce(Heatmap_Label, def_short))
COMBINED_HOX_WNT_all$Heatmap_Label <- gsub("Protein Wnt", "Wnt", COMBINED_HOX_WNT_all$Heatmap_Label, ignore.case = TRUE)
select <- unique(COMBINED_HOX_WNT_all$query)
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = COMBINED_HOX_WNT_all$Heatmap_Label, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "Hox_All")
# Perform clustering
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = COMBINED_HOX_WNT_all$Heatmap_Label,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(2,5,16,18,19,24,33)
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/Hox_All")
mucin <- Manual %>% filter(grepl("mucin", ProteinNames, ignore.case = TRUE)|grepl("toll-", ProteinNames, ignore.case = TRUE)|grepl("ZP", ProteinNames, ignore.case = TRUE)|grepl("lectin", ProteinNames, ignore.case = TRUE)|grepl("nitric", Heatmap_Label, ignore.case = TRUE)) %>% filter(Heatmap_Label !="Cnidocyte marker protein (Collectin-11)")
select <- unique(mucin$query)
select <- select[(select %in% rownames(vsd))]
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "bacteria_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate the heatmap with reordered rows and labels
mucins <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(20,33,38,41)
)
# Save the heatmap
save_ggplot(mucins, "clusters_clean/bacteria_DE_SwissProt")
solute_all <- DESeq_SwissProt %>% filter(grepl("Solute carrier", ProteinNames, ignore.case = TRUE))
solute_all <- solute_all %>%
mutate(SLC_family = str_extract(ProteinNames, "Solute carrier family \\d+")) %>%
mutate(SLC_family = str_replace(SLC_family,"Solute carrier family ", "SLC"))
nrow(solute_all)
## [1] 211
solute_de <- Manual %>% filter(grepl("Solute carrier", ProteinNames, ignore.case = TRUE))
solute_de <- solute_de %>%
mutate(SLC_family = str_extract(ProteinNames, "Solute carrier family \\d+")) %>%
mutate(SLC_family = str_replace(SLC_family,"Solute carrier family ", "SLC"))
solute_de_upAboral <- solute_de %>% filter(log2FoldChange > 1)
nrow(solute_de_upAboral)
## [1] 9
solute_de_upOral <- solute_de %>% filter(log2FoldChange < -1)
nrow(solute_de_upOral)
## [1] 21
length(unique(solute_all$SLC_family))
## [1] 45
length(unique(solute_de_upAboral$SLC_family))
## [1] 8
length(unique(solute_de_upOral$SLC_family))
## [1] 10
table(solute_de$SLC_family)
##
## SLC1 SLC12 SLC15 SLC16 SLC22 SLC23 SLC24 SLC25 SLC30 SLC32 SLC35 SLC46 SLC6
## 2 3 1 4 2 2 1 2 1 1 1 1 5
## SLC7
## 4
#Basing my annotations on: https://re-solute.eu/knowledgebase/slcfamily and https://slc.bioparadigms.org/
slc_map <- data.frame(
SLC_family = c(
"SLC1",
"SLC12",
"SLC15",
"SLC16",
"SLC22",
"SLC23",
"SLC24",
"SLC25",
"SLC30",
"SLC32",
"SLC35",
"SLC46",
"SLC6",
"SLC7"
),
Function = c(
"High-affinity glutamate and neutral amino acid transporter family",
"Electroneutral cation-coupled Cl cotransporter family",
"Proton oligopeptide cotransporter family",
"Monocarboxylate transporter family",
"Organic cation/anion/zwitterion transporter family",
"Sodium-dependent ascorbic acid transporter family",
"Na+/(Ca2+-K+) exchanger family",
"Mitochondrial carrier family",
"Zinc transporter family",
"Vesicular inhibitory amino acid transporter family",
"Nucleoside-sugar transporter family",
"Folate transporter family",
"Sodium- and chloride-dependent neurotransmitter transporter family",
"Cationic amino acid transporter/glycoprotein-associated family"
),
Role = c(
"Neurotransmission, Glu metabolism, development, aa homeostasis",
"cell volume, chloride concentration, blood pressure regulation",
"Immunity, homeostasis of neuropeptides, peptide absoption",
"glucose homeostasis / metabolism, lipid metabolism",
"Drug uptake, endocytosis",
"Absortion and distribution of ascorbic acid",
"Skin/hair/eye pigmentation, vision, olfaction",
"Apoptosis, Glucose/Lipid/Amino acid/Urea, metab., ATP synthesis",
"Zinc homestasis, development, body weight, glucose metabolism",
"Development, neurotransmission",
"Virus infection, development, autophagy, cell proliferation",
"Folate metabolism, Immunity, Drug uptake",
"Cell differentiation, neurotransmission",
"Nitric oxide synthesis, redox balance, aa homeostasis"
),
Substrate_Specific = c(
"Glutamate as Neurotransmitter, anionic, neutral amino acids",
"Chloride, sodium, potassium",
"Dipeptides, oligopeptides",
"Ketone bodies, Lactate, Pyruvate, creatine, aromatic amino acids, T2,T3,T4",
"Drugs, neurotransmitters, purine and pyrimidine nucleosides, niacin, steroid, carnitine, carnitine derivatives, organic anions",
"Ascorbic acid",
"Calcium, Potassium, Sodium",
"Amino acids, Di- tricarboxylic ions, Co-A, Carnitine/acyl-carnitines, H+, metals, nucleotides",
"Zinc, Managnese, Lead",
"GABA, Glycine",
"Purines, pyrimidines, Thiamine, nuclotide-sugars",
"Folic acid, Heme, Methotrexate",
"Neurotransmitters, basic, neutral amino acids",
"Amino acids"
),
Substrate_Broad = c(
"amino acids",
"inorganic ions",
"peptides",
"energy metabolites",
"organic ions",
"ascorbic acid",
"inorganic ions",
"mitochondrial",
"transition element cations",
"amino acids; neurotransmitter",
"nucleosides, nucleotides and nucleotide-sugars",
"organic ions",
"neurotransmitter",
"amino acids"
)
)
solute_de_annot <- solute_de %>%
left_join(slc_map, by = "SLC_family") %>%
mutate(Direction = ifelse(log2FoldChange > 1, "upAboral",
ifelse(log2FoldChange < -1, "upOral", "noChange")))
# Summarize counts by family and direction
plot_data <- solute_de_annot %>%
filter(Direction != "noChange") %>%
group_by(SLC_family, Substrate_Broad, Direction) %>%
summarise(n = n(), .groups = "drop")
ggplot(plot_data, aes(x = SLC_family, y = n, fill = Direction)) +
geom_bar(stat = "identity") +
facet_wrap(~Substrate_Broad, scales = "free_x") +
scale_fill_manual(values = c("upOral" = "darkgreen", "upAboral" = "purple")) +
theme_minimal() +
labs(title = "Differentially Expressed SLC Genes by Family",
x = "SLC Family", y = "Number of DE Genes") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
select <- solute_de$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "SLC_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate the heatmap with reordered rows and labels
SLC <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white",
gaps_row = c(21)
)
# Save the heatmap
save_ggplot(SLC, "clusters_clean/SLC_DE_SwissProt")
sensors <- Manual %>% filter(grepl("TRP", Heatmap_Label, ignore.case = TRUE)|grepl("cellular response to light stimulus", BiologicalProcess, ignore.case = TRUE)|grepl("detection of mechanical stimulus involved in sensory perception", BiologicalProcess, ignore.case = TRUE))
gene_labels <- Manual %>%
select(query,Heatmap_Label) %>%
mutate_all(~ ifelse(is.na(.), "", .)) #replace NAs with "" for labelling purposes
select <- sensors$query
z_scores <- t(scale(t(assay(vsd)[select, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=TRUE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_colors = ann_colors,
labels_row = gene_labels[match(select,(gene_labels$query)),2], fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "sensors_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select,
Heatmap_Label = gene_labels[match(select, gene_labels$query), 2],
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
mutate(cluster = factor(cluster, levels = c(1, 3, 2))) %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Generate the heatmap with reordered rows and labels
transporters_heat <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = c(7) # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(transporters_heat, "clusters_clean/sensors_DE_SwissProt")
select <- DE_05_HeatStressGenes %>% arrange(response_type) %>% select(query,gene_name,response_type) %>% unique()
rownames(select) <- select$query
z_scores <- t(scale(t(assay(vsd)[select$query, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_row = (select %>% select(response_type)), annotation_colors = ann_colors,
labels_row = select$gene_name, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "HeatStress_DE_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select$query,
Heatmap_Label = select$gene_name,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
annotation_row = (select %>% select(response_type)),
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 4 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/HeatStress_DE_SwissProt")
select <- DESeq_HeatStressGenes %>% arrange(response_type) %>% select(query,gene_name,response_type) %>%
group_by(query) %>% #collapse duplicate rows
summarise(
gene_name = paste(unique(gene_name), collapse = "; "),
response_type = paste(unique(response_type), collapse = "; "),
.groups = "drop"
) %>% arrange(response_type,gene_name)
select <- data.frame(select)
rownames(select) <- select$query
z_scores <- t(scale(t(assay(vsd)[select$query, ]), center = TRUE, scale = TRUE))
top50_DE <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200), cluster_rows=FALSE, show_rownames=TRUE,
cluster_cols=TRUE, cutree_cols = 2,annotation_col=(heatmap_metadata%>% select(Tissue)), annotation_row = (select %>% select(response_type)), annotation_colors = ann_colors,
labels_row = select$gene_name, fontsize_row = 8, cutree_rows = 2)
top50_DE
save_ggplot(top50_DE, "HeatStress_all_SwissProt")
# Perform clustering
row_clusters <- hclust(dist(z_scores))
cluster_assignments <- cutree(row_clusters, k = 2) # Adjust k for the number of clusters
# Create a dataframe for clustering and label management
clustered_data <- data.frame(
query = select$query,
Heatmap_Label = select$gene_name,
cluster = cluster_assignments
)
# Reorder rows within each cluster alphabetically by their labels
clustered_data <- clustered_data %>%
arrange(cluster, Heatmap_Label)
# Reorder the z_scores matrix and labels based on the new order
z_scores <- z_scores[match(clustered_data$query, rownames(z_scores)), ]
ordered_labels <- clustered_data$Heatmap_Label
# Ensure there is only 1 gap between clusters
#row_breaks <- which(cluster_assignments != cluster_assignments[1]) # Only where cluster switches
# Generate the heatmap with reordered rows and labels
top50_DE <- pheatmap(
z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = FALSE, # Disable clustering since rows are pre-ordered
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = (heatmap_metadata %>% select(Tissue)),
annotation_colors = ann_colors,
labels_row = ordered_labels,
annotation_row = (select %>% select(response_type)),
fontsize_row = 8,
borders_color = "white", # Optional border around clusters
gaps_row = 4 # Add breaks only between clusters
)
# Save the heatmap
save_ggplot(top50_DE, "clusters_clean/HeatStress_all_SwissProt")
# Function to generate short names for proteins
generate_short_name <- function(data) {
data %>%
mutate(short_name = ifelse(nchar(ProteinNames) > 60,
paste0(substr(ProteinNames, 1, 57), "..."),
ProteinNames))
}
# Function to create gene labels
create_gene_labels <- function(data) {
data %>%
select(query, short_name) %>%
mutate_all(~ ifelse(is.na(.), "", .)) # Replace NAs with ""
}
# Function to generate z-scores for selected genes
calculate_z_scores <- function(data, selection, vsd_matrix) {
selected_genes <- match(selection, rownames(vsd_matrix))
selected_genes <- selected_genes[!is.na(selected_genes)] # Remove NAs
t(scale(t(assay(vsd_matrix)[selected_genes, ]), center = TRUE, scale = TRUE))
}
# Function to create and save heatmap
create_heatmap <- function(z_scores, labels_row, annotation_col, annotation_colors, filename) {
heatmap <- pheatmap(z_scores,
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
cutree_cols = 2,
annotation_col = annotation_col,
annotation_colors = annotation_colors,
labels_row = labels_row,
fontsize_row = 6)
save_ggplot(heatmap, filename)
}
# Process DE_05_SwissProt
DE_05_SwissProt <- generate_short_name(DE_05_SwissProt)
gene_labels <- create_gene_labels(DE_05_SwissProt)
TRP <- DE_05_SwissProt %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
select <- TRP$query
z_scores <- calculate_z_scores(DE_05_SwissProt, select, vsd)
create_heatmap(z_scores,
labels_row = gene_labels[match(select, gene_labels$query), 2],
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "TRP_DE_SwissProt")
# Process annot_tab
annot_tab <- generate_short_name(annot_tab)
gene_labels <- create_gene_labels(annot_tab)
TRP <- annot_tab %>% filter(grepl("transient", ProteinNames, ignore.case = TRUE))
TRP <- left_join(TRP, as.data.frame(resOrdered)) %>% filter(!is.na(log2FoldChange))
select1 <- TRP$query
z_scores <- calculate_z_scores(annot_tab, select1, vsd)
create_heatmap(z_scores,
labels_row = TRP$short_name,
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "TRP_SwissProt")
significant <- ifelse(row.names(z_scores) %in% DE_05$query, "Significant", "Not Significant")
# Add this as a new annotation
row_annotation <- data.frame(Significance = significant)
rownames(row_annotation) <- rownames(z_scores)
row_annotation_colors <- list(Significance = c("Significant" = "red", "Not Significant" = "grey"))
heatmap <- pheatmap(z_scores, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(200),
cluster_rows = TRUE,
show_rownames = TRUE,
cluster_cols = TRUE,
cutree_cols = 2,
cutree_rows = 5,
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
annotation_row = row_annotation,
annotation_row_colors = row_annotation_colors,
labels_row = TRP$short_name,
fontsize_row = 12)
save_ggplot(heatmap, "TRP_SwissProt")
# Process DE_05_SwissProt
DE_05_SwissProt <- generate_short_name(DE_05_SwissProt)
gene_labels <- create_gene_labels(DE_05_SwissProt)
GFP <- DE_05_SwissProt %>% filter(grepl("biolum", BiologicalProcess, ignore.case = TRUE))
select <- GFP$query
z_scores <- calculate_z_scores(DE_05_SwissProt, select, vsd)
create_heatmap(z_scores,
labels_row = gene_labels[match(select, gene_labels$query), 2],
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "GFP_DE_SwissProt")
# Process annot_tab
annot_tab <- generate_short_name(annot_tab)
gene_labels <- create_gene_labels(annot_tab)
GFP <- annot_tab %>% filter(grepl("biolum", BiologicalProcess, ignore.case = TRUE))
select1 <- GFP$query
z_scores <- calculate_z_scores(annot_tab, select1, vsd)
create_heatmap(z_scores,
labels_row = gene_labels[match(select1, gene_labels$query), 2],
annotation_col = heatmap_metadata %>% select(Tissue),
annotation_colors = ann_colors,
filename = "GFP_SwissProt")
Annot_Pdam <- read.csv("../references/annotation/blastp_Pdam_out.tab", sep = '\t', header = FALSE) %>% select(c(1,2)) %>% dplyr::rename("protein_id" = "V2", "query" = "V1")
Manual_Pdam <- left_join(Manual, Annot_Pdam)
library(readxl)
larval_aboral_enriched <- read_excel("../output_RNA/marker_genes/RamonMateu_Pdam_larval.xlsx", sheet = "pocillopora_aboral_enriched_05_")
Manual_Pdam_upaboral <- Manual_Pdam %>% filter(log2FoldChange > 0)
inner_join(larval_aboral_enriched, Manual_Pdam_upaboral, by=c("gene ID"="protein_id")) %>% dim()
## [1] 17 26
#18 genes overlap from the 126 in the paper
larval_oral_enriched <- read_excel("../output_RNA/marker_genes/RamonMateu_Pdam_larval.xlsx", sheet = "pocillopora_oral_enriched_05_lf")
Manual_Pdam_uporal <- Manual_Pdam %>% filter(log2FoldChange < 0)
inner_join(larval_oral_enriched, Manual_Pdam_uporal, by=c("gene ID"="protein_id")) %>% dim()
## [1] 6 26
inner_join(larval_oral_enriched, Manual_Pdam_uporal, by=c("gene ID"="protein_id")) %>% print()
## # A tibble: 6 × 26
## `gene ID` baseMean.x log2FoldChange.x lfcSE.x stat pvalue.x padj.x
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 XP_027046532.1 88.9 4.29 0.652 5.04 4.54e- 7 8.03e- 5
## 2 XP_027036583.1 563. 3.04 0.287 7.12 1.06e-12 4.11e-10
## 3 XP_027054215.1 918. 2.57 0.275 5.71 1.14e- 8 2.77e- 6
## 4 XP_027054196.1 855. 2.49 0.263 5.65 1.59e- 8 3.76e- 6
## 5 XP_027058731.1 1372. 2.44 0.270 5.35 8.95e- 8 1.85e- 5
## 6 XP_027040620.1 1782. 1.83 0.241 3.45 5.66e- 4 4.98e- 2
## # ℹ 19 more variables: `pfam domain` <chr>, `signal peptide` <chr>,
## # `astroides ortholog` <chr>, `astroides oral` <chr>,
## # `clytia ortholog` <chr>, `clytia oral` <chr>, X <int>, query <chr>,
## # baseMean.y <dbl>, log2FoldChange.y <dbl>, lfcSE.y <dbl>, pvalue.y <dbl>,
## # padj.y <dbl>, Heatmap_Label <chr>, blast_hit <chr>, evalue <dbl>,
## # ProteinNames <chr>, BiologicalProcess <chr>, GeneOntologyIDs <chr>
#only 6/83 genes overlap
larval_aboral_clusters <- read_excel("../output_RNA/marker_genes/RamonMateu_Pdam_larval_scRNA.xlsx") %>% filter(!is.na(pocillopora))
larval_aboral_clusters_Pacuta <- inner_join(larval_aboral_clusters, Manual_Pdam, by=c("pocillopora"="protein_id"))
#my genes upregulated in oral tissue are high in mucous cells, cool
After you’ve confirmed your code works as expected, use renv::snapshot() to record the packages and their sources in the lockfile.
renv::snapshot()